{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NGrams processing functions are brought from [Keras Tutorial](https://github.com/keras-team/keras/blob/master/examples/imdb_fasttext.py)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "import tensorflow as tf\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Config:\n",
    "    ngram_range = 2\n",
    "    max_features = 20000\n",
    "    maxlen = 400\n",
    "    batch_size = 32\n",
    "    embedding_dims = 50\n",
    "    epochs = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_ngram_set(input_list, ngram_value):\n",
    "    return set(zip(*[input_list[i:] for i in range(ngram_value)]))\n",
    "\n",
    "\n",
    "def build_ngram(x_train):\n",
    "    ngram_set = set()\n",
    "    for input_list in tqdm(x_train, total=len(x_train), ncols=70):\n",
    "        for i in range(2, Config.ngram_range + 1):\n",
    "            set_of_ngram = create_ngram_set(input_list, ngram_value=i)\n",
    "            ngram_set.update(set_of_ngram)\n",
    "    start_index = Config.max_features + 1\n",
    "    token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}\n",
    "    indice_token = {token_indice[k]: k for k in token_indice}\n",
    "\n",
    "    Config.max_features = np.max(list(indice_token.keys())) + 1\n",
    "    return token_indice\n",
    "\n",
    "\n",
    "def add_ngram(sequences, token_indice):\n",
    "    new_sequences = []\n",
    "    for input_list in tqdm(sequences, total=len(sequences), ncols=70):\n",
    "        new_list = input_list[:]\n",
    "        for ngram_value in range(2, Config.ngram_range + 1):\n",
    "            for i in range(len(new_list) - ngram_value + 1):\n",
    "                ngram = tuple(new_list[i:i + ngram_value])\n",
    "                if ngram in token_indice:\n",
    "                    new_list.append(token_indice[ngram])\n",
    "        new_sequences.append(new_list)\n",
    "    return new_sequences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def model_fn(features, labels, mode):\n",
    "    x = tf.contrib.layers.embed_sequence(features, Config.max_features, Config.embedding_dims)\n",
    "    logits = tf.layers.dense(tf.reduce_mean(x, 1), 2)\n",
    "    \n",
    "    if mode == tf.estimator.ModeKeys.PREDICT:\n",
    "        return tf.estimator.EstimatorSpec(mode,\n",
    "                                          predictions=tf.argmax(logits, -1))\n",
    "    \n",
    "    if mode == tf.estimator.ModeKeys.TRAIN:\n",
    "        loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
    "            logits=logits,\n",
    "            labels=labels))\n",
    "\n",
    "        train_op = tf.train.AdamOptimizer().minimize(loss_op,\n",
    "                                                     global_step=tf.train.get_global_step())\n",
    "        \n",
    "        return tf.estimator.EstimatorSpec(mode=mode,\n",
    "                                          loss=loss_op,\n",
    "                                          train_op=train_op)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=Config.max_features)\n",
    "    token_indice = build_ngram(x_train)\n",
    "    x_train = add_ngram(x_train, token_indice)\n",
    "    x_test = add_ngram(x_test, token_indice)\n",
    "    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, Config.maxlen)\n",
    "    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, Config.maxlen)\n",
    "\n",
    "    estimator = tf.estimator.Estimator(model_fn)\n",
    "\n",
    "    for _ in range(Config.epochs):\n",
    "        estimator.train(tf.estimator.inputs.numpy_input_fn(\n",
    "            x = x_train,\n",
    "            y = y_train,\n",
    "            batch_size = Config.batch_size,\n",
    "            shuffle = True))\n",
    "        y_pred = np.fromiter(estimator.predict(tf.estimator.inputs.numpy_input_fn(\n",
    "            x = x_test,\n",
    "            batch_size = Config.batch_size,\n",
    "            shuffle = False)), np.int32, count=len(x_test))\n",
    "        print(\"\\nValidation Accuracy: %.4f\\n\" % (y_pred==y_test).mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████| 25000/25000 [00:02<00:00, 10699.32it/s]\n",
      "100%|█████████████████████████| 25000/25000 [00:07<00:00, 3485.76it/s]\n",
      "100%|█████████████████████████| 25000/25000 [00:06<00:00, 4045.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Using default config.\n",
      "WARNING:tensorflow:Using temporary folder as model directory: /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6\n",
      "INFO:tensorflow:Using config: {'_model_dir': '/var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x113b72f28>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 1 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:loss = 0.69293195, step = 1\n",
      "INFO:tensorflow:global_step/sec: 2.40842\n",
      "INFO:tensorflow:loss = 0.68844193, step = 101 (41.522 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.32299\n",
      "INFO:tensorflow:loss = 0.614252, step = 201 (43.048 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.50012\n",
      "INFO:tensorflow:loss = 0.5439607, step = 301 (39.998 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.43881\n",
      "INFO:tensorflow:loss = 0.54099536, step = 401 (41.004 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.35875\n",
      "INFO:tensorflow:loss = 0.54534197, step = 501 (42.397 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.36049\n",
      "INFO:tensorflow:loss = 0.37965918, step = 601 (42.363 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.38828\n",
      "INFO:tensorflow:loss = 0.3910194, step = 701 (41.871 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 782 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 0.32732564.\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-782\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "\n",
      "Validation Accuracy: 0.8802\n",
      "\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-782\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 783 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:loss = 0.24241993, step = 783\n",
      "INFO:tensorflow:global_step/sec: 2.668\n",
      "INFO:tensorflow:loss = 0.2724725, step = 883 (37.482 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.50411\n",
      "INFO:tensorflow:loss = 0.23133731, step = 983 (39.935 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.606\n",
      "INFO:tensorflow:loss = 0.16490807, step = 1083 (38.373 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.67103\n",
      "INFO:tensorflow:loss = 0.22434165, step = 1183 (37.439 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.58605\n",
      "INFO:tensorflow:loss = 0.13659978, step = 1283 (38.669 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.58788\n",
      "INFO:tensorflow:loss = 0.14824846, step = 1383 (38.642 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.58916\n",
      "INFO:tensorflow:loss = 0.102174014, step = 1483 (38.622 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 1564 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 0.09975465.\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-1564\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "\n",
      "Validation Accuracy: 0.8999\n",
      "\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-1564\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 1565 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:loss = 0.03993909, step = 1565\n",
      "INFO:tensorflow:global_step/sec: 2.41597\n",
      "INFO:tensorflow:loss = 0.058302276, step = 1665 (41.392 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.43685\n",
      "INFO:tensorflow:loss = 0.16528523, step = 1765 (41.037 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.41689\n",
      "INFO:tensorflow:loss = 0.07849357, step = 1865 (41.376 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.27655\n",
      "INFO:tensorflow:loss = 0.068547964, step = 1965 (43.926 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.3921\n",
      "INFO:tensorflow:loss = 0.08757648, step = 2065 (41.804 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.28515\n",
      "INFO:tensorflow:loss = 0.05769537, step = 2165 (43.761 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.55133\n",
      "INFO:tensorflow:loss = 0.058614854, step = 2265 (39.196 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 2346 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 0.04953124.\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-2346\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "\n",
      "Validation Accuracy: 0.9051\n",
      "\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-2346\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 2347 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:loss = 0.048433587, step = 2347\n",
      "INFO:tensorflow:global_step/sec: 2.26178\n",
      "INFO:tensorflow:loss = 0.059851557, step = 2447 (44.214 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.5061\n",
      "INFO:tensorflow:loss = 0.018525608, step = 2547 (39.903 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.44038\n",
      "INFO:tensorflow:loss = 0.02814805, step = 2647 (40.978 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.1486\n",
      "INFO:tensorflow:loss = 0.064407185, step = 2747 (46.541 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.32765\n",
      "INFO:tensorflow:loss = 0.020313598, step = 2847 (42.962 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.31624\n",
      "INFO:tensorflow:loss = 0.035366714, step = 2947 (43.173 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.38908\n",
      "INFO:tensorflow:loss = 0.026188405, step = 3047 (41.857 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 3128 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 0.012904812.\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-3128\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "\n",
      "Validation Accuracy: 0.9055\n",
      "\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-3128\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 3129 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:loss = 0.022037622, step = 3129\n",
      "INFO:tensorflow:global_step/sec: 2.40032\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:loss = 0.030605339, step = 3229 (41.663 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.45992\n",
      "INFO:tensorflow:loss = 0.013529511, step = 3329 (40.651 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.27158\n",
      "INFO:tensorflow:loss = 0.014844401, step = 3429 (44.023 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.34276\n",
      "INFO:tensorflow:loss = 0.025394464, step = 3529 (42.684 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.19481\n",
      "INFO:tensorflow:loss = 0.020423576, step = 3629 (45.562 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.28575\n",
      "INFO:tensorflow:loss = 0.013294871, step = 3729 (43.749 sec)\n",
      "INFO:tensorflow:global_step/sec: 2.25662\n",
      "INFO:tensorflow:loss = 0.00724068, step = 3829 (44.314 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 3910 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 0.0055243005.\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt-3910\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "\n",
      "Validation Accuracy: 0.9068\n",
      "\n"
     ]
    }
   ],
   "source": [
    "main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
